- Finding Clusters with Cooperative Game Theory
- A research experiment of $^\dagger$Giovanni, $^\dagger$Kostya, $^\ddagger$Daniel and $^\ddagger$Lucas (ack $^\ddagger$Eduardo)
- $^\dagger$INRIA $\mid$ $^\ddagger$Federal University of Rio de Janeiro
- Demonstration on Web
In this notebook, we present preliminary results on the convergence and accuracy of clustering based on hedonic games.
import os
path = './experiments/'
experiments = [e for e in os.listdir(path) if not e.startswith('.')]
for e in experiments: print(f'- {e}')
In this demonstration we illustrate our results using the Terrorist dataset. One can see images and details about the network here.
experiments = [ # select the ones you want from above list
'terrorists_190626071230',
'terrorists_190626071357',
'terrorists_190626071417',
'terrorists_190626071259',
'terrorists_190626071454',
'terrorists_190626071324',
'terrorists_190626071132',
'terrorists_190626071503',
'karate_190626072044',
'karate_190626072112',
'karate_190626071626',
'karate_190626071609',
'karate_190626071618']
def show_info(exp):
folder = path + exp
file = open(folder + '/infos.md', "r")
info = file.read()
print(info)
file.close()
for exp in experiments:
print(exp.upper())
show_info(exp)
print('\n\n' + ('='*100) +'\n\n')
import numpy as np
import pandas as pd
import networkx as nx
import holoviews as hv
from holoviews import opts, dim
from bokeh.models import HoverTool
hv.extension('bokeh') # could be 'matplotlib' instead
defaults = dict(width=400, height=400, padding=0.1)
hv.opts.defaults(
opts.EdgePaths(**defaults), opts.Graph(**defaults), opts.Nodes(**defaults))
networks = {}
for exp in experiments:
networks[exp[:-13]] = None
networks = list(networks.keys())
graphs = {}
def show_network(net):
graph = pd.read_csv('./networks/'+net+'.csv', names=['source', 'target'], dtype=object)
g_truth = pd.read_csv('./networks/ground_truth/'+net+'.csv', dtype=object)
nodes = g_truth.columns.values
clusters = g_truth.iloc[0]
graphs[net] = nx.from_pandas_edgelist(graph)
gt_clusters = {}
for n, c in zip(nodes, clusters):
if c in gt_clusters: gt_clusters[c] += 1
else: gt_clusters[c] = 0
graphs[net].node[n]['g_truth'] = c
print(net.upper())
print('Number of Nodes:', len(graphs[net].nodes))
print('Number of Edges:', len(graphs[net].edges))
for cluster, nodes in gt_clusters.items():
print("Number of Nodes inside cluster '{}': {}".format(cluster, nodes))
print('\n\n' + ('='*100) +'\n\n')
t_opts = opts.Graph(tools=['hover'], node_color=dim('g_truth'), cmap='YlGnBu',
xaxis=None, yaxis=None, edge_line_width=.7, title=net)
circ = hv.Graph.from_networkx(graphs[net], nx.layout.circular_layout).opts(t_opts)
kawai = hv.Graph.from_networkx(graphs[net], nx.layout.kamada_kawai_layout).opts(t_opts)
#chord = hv.Chord(graphs[net]).opts(t_opts)
return (circ + kawai)
nets = show_network(networks[0])
for net in range(1, len(networks)):
nets += show_network(networks[net])
nets.opts(merge_tools=True).cols(2)
def get_graph(net, state, iteration):
np.random.seed(10)
clus = 'Other Associates of Hijackers' # TO-DO: AUTO DEFINE
G = graphs[net[:-13]].copy()
for n in G.nodes:
G.node[n]['cluster'] = state[n][iteration]
if G.node[n]['cluster'] == 'in' and G.node[n]['g_truth'] == clus:
G.node[n]['confusion'] = 'True Positive'
if G.node[n]['cluster'] == 'out' and G.node[n]['g_truth'] != clus:
G.node[n]['confusion'] = 'True Negative'
if G.node[n]['cluster'] == 'in' and G.node[n]['g_truth'] != clus:
G.node[n]['confusion'] = 'False Positive'
if G.node[n]['cluster'] == 'out' and G.node[n]['g_truth'] == clus:
G.node[n]['confusion'] = 'False Negative'
circular = hv.Graph.from_networkx(G, nx.circular_layout)
spring = hv.Graph.from_networkx(G, nx.spring_layout, iterations=iteration)
return (circular + spring)
def get_plot(net):
colors = ['red', 'white', 'green', 'black']
ops = opts.Graph(tools=['hover'], node_color=dim('confusion'), xaxis=None, yaxis=None,
cmap=colors, edge_line_width=.7, title=net, show_legend=True)
plots = {'circular': {}, 'spring': {}}
state = pd.read_csv(path + net + '/states.csv', dtype=object)
for i in range(len(state.values)):
g = get_graph(net, state, i)
plots['circular'][i] = g[0].opts(ops)
plots['spring'][i] = g[1].opts(ops)
c = hv.HoloMap(plots['circular'], kdims='Iteration')
s = hv.HoloMap(plots['spring'], kdims='Iteration')
return (c + s).opts(merge_tools=True)
plots = get_plot(experiments[0])
for exp in range(1, len(experiments)):
plots += get_plot(experiments[exp])
plots.cols(2)
def inst_accum(net):
iterations = pd.read_csv(path + net + '/iterations.csv')
iters_axis = np.arange(iterations.shape[0])
instantaneous = np.array(iterations['Instantaneous Gain'])
accumulated = np.array(iterations['Accumulated Gain'])
instantaneous = hv.Scatter((iters_axis, instantaneous), 'Iteration', 'Instantaneous Gain')
accumulated = hv.Curve((iters_axis, accumulated), 'Iteration', 'Accumulated Gain')
hover = HoverTool(tooltips=[('Experiment', net), ('Iteration', '$x'), ('Gain', '$y')])
instantaneous.opts(width=800, tools=[hover], padding=0.1, size=7,
color=hv.Cycle('Pastel1'), title=net[:-13], show_legend=True)
accumulated.opts(width=800, tools=[hover], padding=0.1,
color=hv.Cycle('Pastel1'), title=net[:-13], show_legend=True)
return instantaneous, accumulated
temp = {}
for exp in experiments:
inst, accu = inst_accum(exp)
if exp[:-13] in temp:
temp[exp[:-13]]['instantaneous'] *= inst
temp[exp[:-13]]['accumulated'] *= accu
else:
temp[exp[:-13]] = {}
temp[exp[:-13]]['instantaneous'] = inst
temp[exp[:-13]]['accumulated'] = accu
graphics = []
for key, value in temp.items():
graphics.append(value['instantaneous'])
graphics.append(value['accumulated'])
plts = graphics[0]
for l in range(1, len(graphics)):
plts += graphics[l]
plts.opts(merge_tools=True).cols(1)
Next, we illustrate the number of vertices and edges in the clusters
def get_scatter(c, m, l, value, iters_axis):
ops = opts.Scatter(color=c, tools=['hover'], width=700, height=500,
marker=m, size=7, padding=0.1, bgcolor='#ffffd9')
return hv.Scatter((iters_axis, value), 'Iteration', 'Quantity', label=l).opts(ops)
def verts_edges(net):
properties = pd.read_csv(path + net + '/properties.csv')
iters_axis = np.arange(properties.shape[0])
vert_in = np.array(properties['Verts In'])
edge_in = np.array(properties['Edges In'])
vert_out = np.array(properties['Verts Out'])
edge_out = np.array(properties['Edges Out'])
vert_in = get_scatter('#081D57', '*', 'Vertices In', vert_in, iters_axis)
edge_in = get_scatter('#081D57', 's', 'Edges In', edge_in, iters_axis)
vert_out = get_scatter('#3FB5C3', '*', 'Vertices Out', vert_out, iters_axis)
edge_out = get_scatter('#3FB5C3', 's', 'Edges Out', edge_out, iters_axis)
return (vert_in * edge_in * vert_out * edge_out).opts(title=net)
plts = verts_edges(experiments[0])
for exp in range(1,len(experiments)):
plts += verts_edges(experiments[exp])
plts.opts(merge_tools=True).cols(1)
def rectangle(x=0, y=0, width=1, height=1):
return np.array([(x,y), (x+width, y), (x+width, y+height), (x, y+height)])
def get_image(net):
state = pd.read_csv(path + net + '/states.csv', dtype=object)
cluster_map = []
for i in range(len(state.values)):
iteration = state.values[i]
for c in range(len(iteration)):
cluster = iteration[c]
j = state.columns[c]
cluster_map.append({('x', 'y'): rectangle(c, i), 'cluster': cluster})
hover = HoverTool(tooltips=[('cluster', '@cluster'), ('Node', '$x'), ('Iteration', '$y')])
ops = opts.Polygons(
cmap='YlGnBu', tools=[hover], padding=0.1, line_width=0.25, width=800, xlabel='Nodes',
ylabel='Iterations', invert_yaxis=True, xaxis='top', show_legend=True, title=net,
xlim=(0, len(state.values[0])), ylim=(0, len(state.values)))
return hv.Polygons(cluster_map, vdims='cluster').opts(ops)
plts = get_image(experiments[0])
for exp in range(1,len(experiments)):
plts += get_image(experiments[exp])
plts.opts(merge_tools=True).cols(1)
get_image(experiments[6]) # When printed alone, scale is corret
Not support list of exeriments (currently is using the first experiment of the list) because metrics will be changed to Adjusted Mutual Information
acc = pd.read_csv(path + experiments[0] + '/accuracy.csv')
iters_axis = np.arange(acc.shape[0])
groups = []
i = 0
for _ in range(int(len(acc.columns) / 4)):
groups.append(acc.columns[i])
i += 4
combinations = {}
for g in range(len(groups)):
column = g * 4
combinations[groups[g]] = {
'tp' : acc.iloc[:, column + 0].values[1:],
'tn' : acc.iloc[:, column + 1].values[1:],
'fp' : acc.iloc[:, column + 2].values[1:],
'fn' : acc.iloc[:, column + 3].values[1:]}
tooltips = [('iteration', '@x'), ('quantity', '$y')]
hover = HoverTool(tooltips=tooltips)
def get_graphic(value, lab, c):
return hv.Scatter((iters_axis, value), label=lab).opts(
tools=[hover], ylim=(0,100), color=c, size=7,
width=700, height=500, xlabel='Iterations', ylabel='Quantity', padding=0.1)
def get_plot(tp, tn, fp, fn):
t_p = get_graphic(tp, 'True Positive', 'green')
t_n = get_graphic(tn, 'True Negative', 'yellow')
f_p = get_graphic(fp, 'False Positive', 'black').opts(marker='x')
f_n = get_graphic(fn, 'False Negative', 'red').opts(marker='x')
return t_p * t_n * f_n * f_p
plots = []
for pair, values in combinations.items():
plots.append(get_plot(values['tp'], values['tn'], values['fn'], values['fp']).opts(
title='Pair: ' + pair))
final = plots[0]
for i in range(1,len(plots)):
final += plots[i]
final.opts(merge_tools=True).cols(1)
Theoretically, it could be $0.05$, which is $\frac{1}{20}$ (given that $\alpha = \frac{19}{20}$)
min_gain = float('Inf')
for exp in experiments:
inst = pd.read_csv(path + exp + '/iterations.csv')
inst = np.array(inst['Instantaneous Gain'])
inst = min(inst)
print(f'- {exp}: {inst}')
if inst < min_gain: min_gain = inst
print(f'Minimum instantaneous gain of all experiments in the list: {min_gain}')
def get_last_state(exp):
state = pd.read_csv(path + exp + '/states.csv')
s = ''
for row in state.iloc[-1].values: s += row
return s
results = {}
for exp in experiments:
if exp[:-13] not in results:
results[exp[:-13]] = {}
last = get_last_state(exp)
if last not in results[exp[:-13]]:
results[exp[:-13]][last] = 1
else:
results[exp[:-13]][last] += 1
for key, value in results.items():
total = 0
for res, number in value.items():
total += number
print(f'Network {key.upper()} have {len(value.keys())} distinct final states ' +\
f'out of {total} experiments: {len(value.keys()) / total * 100}%')
iter_sizes = {}
for exp in experiments:
inst = pd.read_csv(path + exp + '/iterations.csv')
if exp[:-13] not in iter_sizes:
iter_sizes[exp[:-13]] = []
iter_sizes[exp[:-13]].append(len(np.array(inst['Instantaneous Gain'])))
def get_histogram(values):
frequencies, edges = np.histogram(values, 5)
#print('Values: %s, Edges: %s' % (frequencies.shape[0], edges.shape[0]))
return hv.Histogram((edges, frequencies))
plt = []
for key, value in iter_sizes.items():
plt.append(get_histogram(value).opts(title=key))
hists = plt[0]
for p in range(1, len(plt)):
hists += plt[p]
hists